library(tidyverse)
library(broom)
library(kableExtra)
library(scales)
library(dplyr)
library(mclogit)




load("california.Rds")
california_logit = california

#
#1) California Model - full control set
#
model_c2 = mblogit(factor(label) ~ dem_share + YEAR + ln_population + white + 
                     black + asian + hispanic + bachelors + log(income) +
                     management_cen + service_cen + sales_cen + construction_cen + 
                     selfemploy_cen + nonprofit_cen + govt_cen,  
                   data = california_logit)



ndata_c2 = data.frame(dem_share = seq(0,1, by = 0.05), 
                      YEAR = factor(2020, levels = levels(california_logit$YEAR)[-25]),
                      ln_population = median(california_logit$ln_population),
                      white = median(california_logit$white),
                      black = median(california_logit$black),
                      asian = median(california_logit$asian),
                      hispanic = median(california_logit$hispanic),
                      bachelors = median(california_logit$bachelors),
                      income = median(california_logit$income),
                      management_cen = median(california_logit$management_cen),
                      service_cen = median(california_logit$service_cen),
                      sales_cen = median(california_logit$sales_cen),
                      construction_cen = median(california_logit$construction_cen),
                      selfemploy_cen = median(california_logit$selfemploy_cen),
                      nonprofit_cen = median(california_logit$nonprofit_cen),
                      govt_cen = median(california_logit$govt_cen))



#two separate graphs 
c2_data = cbind(predict(model_c2, newdata = ndata_c2, se.fit = TRUE, type = "response")$fit %>% 
        data.frame() %>% 
        mutate(dem_share = seq(0,1, by = 0.05)) %>% 
        pivot_longer(cols = c(1:9), names_to = "cat", values_to = "share"),
      predict(model_c2, newdata = ndata_c2, se.fit = TRUE, type = "response")$se.fit %>% 
        data.frame() %>% 
        pivot_longer(cols = everything(), names_to = "cat", values_to = "se") %>% 
        select(se)) %>% 
  mutate(cat = str_replace_all(cat, "\\.", " "))

r_plot = ggplot(c2_data |>
         filter(cat %in% c("Politician or Staff Member", 
                           "Lawyer",
                           "Business Owner Executive",
                           "Business Employee",
                           "Military or Law Enforcement")), 
       aes(x = dem_share, y = share, color = cat)) + 
  geom_point(aes(shape = cat), size = 2) +
  geom_smooth(size = 0.5) +
  geom_ribbon(
    aes(
      ymin = share-(1.96*se),
      ymax = share+(1.96*se),
      color = cat,
      fill = cat
    ),
    alpha = 0.4
  ) + 
  labs(color= "Category", 
       fill = "Category", 
       shape = "Category")+ 
  theme_minimal() +
  scale_y_continuous(labels = scales::percent) + 
  scale_x_continuous(labels = scales::percent) + 
  ylab("Share of City Council Chandidates") + 
  xlab("Democratic Voteshare")+
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom") + 
  scale_color_manual(
    values = c("Politician or Staff Member" = "black",
               "Lawyer" = "grey",
               "Business Owner Executive" = "#FF787F",
               "Business Employee" = "#FF001A",
               "Military or Law Enforcement" = "#CD4811"),
    breaks = c("Politician or Staff Member", 
    "Lawyer",
    "Business Owner Executive",
    "Business Employee",
    "Military or Law Enforcement")
  ) + 
  scale_fill_manual(
    values = c("Politician or Staff Member" = "black",
               "Lawyer" = "grey",
               "Business Owner Executive" = "#FF787F",
               "Business Employee" = "#FF001A",
               "Military or Law Enforcement" = "#CD4811"),
    breaks = c("Politician or Staff Member", 
               "Lawyer",
               "Business Owner Executive",
               "Business Employee",
               "Military or Law Enforcement")
  )+
  scale_shape_manual(
    values = c("Politician or Staff Member" = 1,
               "Lawyer" = 2,
               "Business Owner Executive" = 3,
               "Business Employee" = 4,
               "Military or Law Enforcement" = 5),
    breaks = c("Politician or Staff Member", 
               "Lawyer",
               "Business Owner Executive",
               "Business Employee",
               "Military or Law Enforcement")
  )+
  guides(
    fill = guide_legend(ncol=3),
    color= guide_legend(ncol=3),
    shape = guide_legend(ncol=3)
  )
    


d_plot = ggplot(c2_data |>
         mutate(cat = ifelse(cat == "Non Profit Worker",
                             "Non-Profit Worker",
                             cat)) |> 
         filter(cat %in% c("Politician or Staff Member", 
                           "Lawyer",
                           "Non-Profit Worker",
                           "Service Based Professional")), 
       aes(x = dem_share, y = share, color = cat)) + 
  geom_point(aes(shape = cat), size = 2) +
  geom_smooth(size = 0.5) +
  geom_ribbon(
    aes(
      ymin = share-(1.96*se),
      ymax = share+(1.96*se),
      color = cat,
      fill = cat
    ),
    alpha = 0.4
  ) + 
  labs(color= "Category", 
       fill = "Category", 
       shape = "Category") + 
  theme_minimal() +
  scale_y_continuous(labels = scales::percent) + 
  scale_x_continuous(labels = scales::percent) + 
  ylab("Share of City Council Chandidates") + 
  xlab("Democratic Voteshare")+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 14),
        title = element_text(size = 16),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom",
        legend.direction = "horizontal") + 
  scale_color_manual(
    values = c("Politician or Staff Member" = "black",
               "Lawyer" = "grey",
               "Non-Profit Worker" = "#4148E8",
               "Service Based Professional" = "#2FBCE8",
               "Technical Professional" = "#A5BCE8"),
    breaks = c("Politician or Staff Member", 
               "Lawyer",
               "Non-Profit Worker",
               "Service Based Professional",
               "Technical Professional")
  ) + 
  scale_fill_manual(
    values = c("Politician or Staff Member" = "black",
               "Lawyer" = "grey",
               "Non-Profit Worker" = "#4148E8",
               "Service Based Professional" = "#2FBCE8",
               "Technical Professional" = "#A5BCE8"),
    breaks = c("Politician or Staff Member", 
               "Lawyer",
               "Non-Profit Worker",
               "Service Based Professional",
               "Technical Professional")
  )+
  scale_shape_manual(
    values = c("Politician or Staff Member" = 1,
               "Lawyer" = 2,
               "Non-Profit Worker" = 3,
               "Service Based Professional" = 4,
               "Technical Professional" = 5),
    breaks = c("Politician or Staff Member", 
               "Lawyer",
               "Non-Profit Worker",
               "Service Based Professional",
               "Technical Professional")
  )+
  guides(
    fill = guide_legend(ncol=3),
    color= guide_legend(ncol=3),
    shape = guide_legend(ncol=3)
  )



ggsave(r_plot, filename = "figures/republican_logit.png", width = 10, height = 8, bg = "white")
ggsave(d_plot, filename = "figures/democratic_logit.png", width = 10, height = 8, bg = "white")




#
## national sample model
#
load("national.Rds")
tw_ideo = read_csv("control_data/taus_war_scores.csv") %>% 
  mutate(City = tolower(City))
##big cities by ideology score 
bigcities = national_sample %>% 
  mutate(City = tolower(City), label = label_1) %>% 
  merge(tw_ideo) %>% 
  mutate(mrp_ideology = scale(mrp_ideology)) %>% 
  mutate(place_fips = as.character(place_fips)) %>% 
  mutate(place_fips = ifelse(nchar(place_fips) == 6, 
                             paste0("0",place_fips), 
                             place_fips)) %>% 
  mutate(place_fips = ifelse(City == "louisville",
                             2148006,
                             place_fips)) %>% 
  mutate(place_fips = ifelse(City == "honolulu",
                             1571550,
                             place_fips))
##get census data for these cities
censusdata = read_csv("control_data/nhgis0008_csv/nhgis0008_ds254_20215_place.csv") %>% 
  mutate(
    place_fips = paste0(STATEA, PLACEA),
    total = AON4E001,
    white = AON5E002/total,
    black = AON5E003/total,
    asian = AON5E005/total,
    hispanic = AOODE003/total,
    bachelors = (AOP8E019 + AOP8E020 + AOP8E021 + AOP8E022 + AOP8E023 + AOP8E024 + AOP8E025)/total, 
    income = AOQIE001,
    ln_population = log(total)
  ) %>% 
  rename(
    total_population = total
  ) %>% 
  select(place_fips, total_population, ln_population, white, black, asian, hispanic, bachelors, income)


###occupation
occupationdata = read_csv("control_data/nhgis0009_csv/nhgis0009_ds255_20215_place.csv") %>% 
  mutate(
    place_fips = paste0(STATEA, PLACEA),
    #industry
    total = APGYE001,
    management_cen = APGYE002/total,
    service_cen = APGYE003/total,
    sales_cen = APGYE004/total,
    construction_cen = APGYE005/total,
    #omit: transport_cen,
    #type cats
    #omit: privatecompany_cen = 
    selfemploy_cen = (APGYE013 + APGYE031)/total,
    nonprofit_cen = APGYE019/total,
    govt_cen = APGYE025/total
  ) %>% 
  select(place_fips, total, management_cen, service_cen, sales_cen, 
         construction_cen, selfemploy_cen, nonprofit_cen, govt_cen)


us_cen_final = merge(censusdata, occupationdata)

bigcities_final = merge(bigcities, us_cen_final)



#model with tausanovitch warshaw ideology scores 
model_national = mblogit(factor(label) ~ mrp_ideology + ln_population 
        + white + log(income) + black + asian + hispanic + bachelors + 
          management_cen + service_cen + sales_cen + construction_cen + 
          selfemploy_cen + nonprofit_cen + govt_cen,
        data = bigcities_final)

ndata_n = data.frame(mrp_ideology = seq(-2.5, 2.5, by = 0.25), 
                   ln_population = median(bigcities_final$ln_population),
                   white = median(bigcities_final$white),
                   black = median(bigcities_final$black),
                   asian = median(bigcities_final$asian),
                   hispanic = median(bigcities_final$hispanic),
                   bachelors = median(bigcities_final$bachelors),
                   income = median(bigcities_final$income),
                   management_cen = median(bigcities_final$management_cen),
                   service_cen = median(bigcities_final$service_cen),
                   sales_cen = median(bigcities_final$sales_cen),
                   construction_cen = median(bigcities_final$construction_cen),
                   selfemploy_cen = median(bigcities_final$selfemploy_cen),
                   nonprofit_cen = median(bigcities_final$nonprofit_cen),
                   govt_cen = median(bigcities_final$govt_cen))



n_prediction = cbind(predict(model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$fit %>% 
                       data.frame() %>% 
                       mutate(mrp_ideology = seq(-2.5, 2.5, by = 0.25)) %>% 
                       pivot_longer(cols = c(1:9), names_to = "cat", values_to = "share"),
                     predict(model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$se.fit %>% 
                       data.frame() %>% 
                       pivot_longer(cols = everything(), names_to = "cat", values_to = "se") %>% 
                       select(se)) |> 
  mutate(cat = str_replace_all(cat, "\\.", " "))




n_plot = ggplot(n_prediction |> 
         filter(cat %in% c("Politician or Staff Member", 
                           "Business Owner Executive",
                           "Non Profit Worker",
                           "Military or Law Enforcement")) |> 
         mutate(mrp_i_flip = -1 * mrp_ideology),
       aes(x = mrp_i_flip, y = share, color = cat)) + 
  geom_point(aes(shape = cat), size = 2) +
  geom_smooth() +
  geom_ribbon(
    aes(
      ymin = share-(1.96*se),
      ymax = share+(1.96*se),
      color = cat,
      fill = cat
    ),
    alpha = 0.4
  ) + 
  labs(color= "Category", 
       fill = "Category",
       shape = "Category") + 
  scale_shape_manual(
    values = c(
      "Business Owner Executive" = 1,
      "Politician or Staff Member" = 2,
      "Military or Law Enforcement" = 3,
      "Non Profit Worker" = 4
    )
  )+
  theme_minimal() + 
  scale_y_continuous(labels = scales::percent) + 
  ylab("Share of City Council Members") + 
  xlab("Scaled Ideology Score") + 
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom",
        legend.direction = "horizontal") 



ggsave(n_plot, filename = "figures/national_logit.png", width = 10, height = 8, bg = "white")





#model with 2020 presidential election voteshare
natl_pres = haven::read_dta("control_data/aip_city_pvote.dta") |> 
  filter(year == 2020) |> 
  select(place_fips, demshare_pres) |> 
  mutate(place_fips = ifelse(nchar(place_fips) == 6, 
                             paste0("0",place_fips), 
                             place_fips))
bigcities_final = bigcities_final |> merge(natl_pres, all.x = T)

model_national = mblogit(factor(label) ~ demshare_pres + ln_population 
                         + white + log(income) + black + asian + hispanic + bachelors + 
                           management_cen + service_cen + sales_cen + construction_cen + 
                           selfemploy_cen + nonprofit_cen + govt_cen,
                         data = bigcities_final)

ndata_n = data.frame(demshare_pres = seq(0, 1, by = 0.05), 
                     ln_population = median(bigcities_final$ln_population),
                     white = median(bigcities_final$white),
                     black = median(bigcities_final$black),
                     asian = median(bigcities_final$asian),
                     hispanic = median(bigcities_final$hispanic),
                     bachelors = median(bigcities_final$bachelors),
                     income = median(bigcities_final$income),
                     management_cen = median(bigcities_final$management_cen),
                     service_cen = median(bigcities_final$service_cen),
                     sales_cen = median(bigcities_final$sales_cen),
                     construction_cen = median(bigcities_final$construction_cen),
                     selfemploy_cen = median(bigcities_final$selfemploy_cen),
                     nonprofit_cen = median(bigcities_final$nonprofit_cen),
                     govt_cen = median(bigcities_final$govt_cen))



n_prediction = cbind(predict(model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$fit %>% 
                       data.frame() %>% 
                       mutate(demshare_pres = seq(0, 1, by = 0.05)) %>% 
                       pivot_longer(cols = c(1:9), names_to = "cat", values_to = "share"),
                     predict(model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$se.fit %>% 
                       data.frame() %>% 
                       pivot_longer(cols = everything(), names_to = "cat", values_to = "se") %>% 
                       select(se)) |> 
  mutate(cat = str_replace_all(cat, "\\.", " "))




n_plot = ggplot(n_prediction |> 
                  filter(cat %in% c("Politician or Staff Member", 
                                    "Non Profit Worker",
                                    "Business Owner Executive",
                                    "Military or Law Enforcement")),
                aes(x = demshare_pres, y = share, color = cat)) + 
  geom_point(aes(shape = cat), size = 2) +
  geom_smooth() +
  geom_ribbon(
    aes(
      ymin = share-(1.96*se),
      ymax = share+(1.96*se),
      color = cat,
      fill = cat
    ),
    alpha = 0.4
  ) + 
  labs(color= "Category", 
       fill = "Category",
       shape = "Category") + 
  scale_shape_manual(
    values = c(
      "Business Owner Executive" = 1,
      "Politician or Staff Member" = 2,
      "Military or Law Enforcement" = 3,
      "Non Profit Worker" = 4
    )
  )+
  theme_minimal() + 
  scale_y_continuous(labels = scales::percent) + 
  scale_x_continuous(labels = scales::percent) + 
  ylab("Share of City Council Members") + 
  xlab("Democratic Voteshare") + 
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom",
        legend.direction = "horizontal") 


n_plot
ggsave(n_plot, filename = "figures/national_logit_p.png", width = 10, height = 8, bg = "white")




#display raw coefficients in appendix
coefs = as.data.frame(t(model_c2$coefmat))
ses = summary(model_c2)$coefficients[,2]
ses = matrix(ses, nrow = ncol(coefs), ncol = nrow(coefs)) |> as.data.frame() |> t()

for_xtable = matrix(nrow = nrow(coefs), ncol = ncol(coefs))

for(i in c(1:nrow(coefs))){
  for(j in c(1:ncol(coefs))){
    print(c(i,j))
    for_xtable[i,j] = paste0(round(coefs[i,j], 3), " (", round(ses[i,j], digits = 3), ")")
  }
}

pres = as.data.frame(for_xtable, row.names = row.names(coefs))
names(pres) = names(coefs)

xtable::xtable(pres[,1:4])
xtable::xtable(pres[,5:8])

#national
coefs = as.data.frame(t(model_national$coefmat))
ses = summary(model_national)$coefficients[,2]
ses = matrix(ses, nrow = ncol(coefs), ncol = nrow(coefs)) |> as.data.frame() |> t()

for_xtable = matrix(nrow = nrow(coefs), ncol = ncol(coefs))

for(i in c(1:nrow(coefs))){
  for(j in c(1:ncol(coefs))){
    print(c(i,j))
    for_xtable[i,j] = paste0(round(coefs[i,j], 3), " (", round(ses[i,j], digits = 3), ")")
  }
}
pres = as.data.frame(for_xtable, row.names = row.names(coefs))
colnames(pres) = colnames(coefs)
rownames(pres) = c("Intercept", "Scaled Ideology","Logged Population","White" , "Logged Income" ,    
 "Black"  , "Asian" , "Hispanic","Bachelors"  , "Management" ,
"Service" , "Sales", "Construction", "Self Employed", "Non-Profit Employed" ,  
"Government Employed" )

xtable::xtable(pres[,1:4])
xtable::xtable(pres[,5:8])
